In [10]:
import pandas as pd
df = pd.read_excel("sheet_1_with_simple_logic.xls")
print(df)
In [11]:
df.head() # this creates a Table view (non-interactive but prettier)
# NOTE! head shows 5 items by default and we have 6 items(!)
Out[11]:
In [19]:
df.head(10)
Out[19]:
In [12]:
print("Column names:", df.columns)
In [13]:
print("Information about each row including data types:")
print("(note - type 'object' is catch-all that includes strings)")
df.info()
In [14]:
print("\nWe can extract a column of data as a Series object:")
print(df['Feature1'])
In [15]:
row = df.ix[0]
print("\nWe can extract a row as a Python dictionary:")
print(row)
In [16]:
print("\nRow items, e.g. Feature1={feature1}".format(feature1=row['Feature1']))
In [17]:
def multiply_feature1_by_2(cell):
return cell * 2
# we'll apply a function cell-by-cell to each cell in a Series (we pull out the Feature1 Series)
df['Feature1'].apply(multiply_feature1_by_2)
# note this doesn't change the DataFrame, it generates a new separate Series
# and here we just print it and then discard it
Out[17]:
In [20]:
# we can assign the result back to the DataFrame as a new column
new_result = df['Feature1'].apply(multiply_feature1_by_2)
df['Feature1_Times_2'] = new_result
df.head(10)
Out[20]:
In [ ]: